#### PREAMBLE ####

library(ggplot2)
library(geosphere)
library(dplyr)
library(data.table)
library(tidyr)
library(zoo)
library(sf)
library(ggspatial)
library(RColorBrewer)
library(estimatr)
library(broom)
library(texreg)
library(did)


#### TEMPLATE ####
goldenScatterCAtheme <- theme(
  panel.background = element_rect(fill = "white"),
  aspect.ratio = ((1 + sqrt(5))/2)^(-1),
  axis.ticks.length = unit(0.5, "char"),
  axis.line.x.top = element_line(size = 0.2),
  axis.line.x.bottom = element_line(size = 0.2),
  axis.ticks.x = element_line(size = 0.2),
  axis.text.x = element_text(color = "black", size = 10),
  axis.title.x = element_text(size = 10, 
                              margin = margin(t = 7.5, r = 0, b = 0, l = 0)),
  axis.ticks.y = element_blank(),
  axis.text.y = element_text(color = "black", size = 10,
                             margin = margin(t = 0, r = -4, b = 0, l = 0)),
  axis.title.y = element_text(size = 10,
                              margin = margin(t = 0, r = 7.5, b = 0, l = 0)),
  #legend.key = element_rect(fill = NA, color = NA),
  panel.grid.major.x = element_blank(),
  panel.grid.major.y = element_line(color = "gray45", size = 0.2),
  strip.background = element_blank(),
  strip.text.x = element_text(size = 8),
  strip.text.y = element_blank(),
  strip.placement = "outside",
  panel.spacing.x = unit(1.25, "lines"),
  panel.spacing.y = unit(1, "lines")
)
class(goldenScatterCAtheme)
#### LOAD DATA ####

set.seed(20230711)

#load main data
df <- read.csv("Outputs/Analysis all muns.csv", stringsAsFactors = F, na.strings = c("", "NA"))
df$INEGI <- sprintf("%05d", df$INEGI)
df$INEGI <- as.character(df$INEGI)
df$time <- as.Date(df$time)

#### TRANSFORMATION ####

#states treated by US trade restrictions
unique(df$State[df$ustrade==1])
df$ustrade[df$time>=as.Date("2016-06-01") & df$trade==1 & (df$State=="MICHOACAN")] <- 1
#updating ustrade.
df$ustrade <- df$ustrade * df$trade

#### CREATING VARIABLES FOR LATER DISAGGREGATED GROUP-TIME ATEs ####

#create group variable based on when each unit gets treated
df$new_time <- match(df$time, sort(unique(df$time)))

temp <- df %>%
  group_by(INEGI) %>% 
  filter(trade == 1) %>% 
  filter(new_time == min(new_time)) %>% 
  ungroup()
temp$min_time <- temp$new_time
temp <- temp[,c("INEGI", "min_time")]

df <- left_join(df, temp, by=c("INEGI"="INEGI"))
df$min_time[is.na(df$min_time)] <- 0
unique(df$min_time)


temp <- df %>%
  group_by(INEGI) %>% 
  filter(ustrade == 1) %>% 
  filter(new_time == min(new_time)) %>% 
  ungroup()
temp$min_time_us <- temp$new_time
temp <- temp[,c("INEGI", "min_time_us")]

df <- left_join(df, temp, by=c("INEGI"="INEGI"))
df$min_time_us[is.na(df$min_time_us)] <- 0
unique(df$min_time_us)


#analysis
df$INEGI_num <- as.numeric(df$INEGI)


#### LOAD AND JOIN PUBLIC EXPENDITURE DATA ####

# Load necessary library
library(data.table) # Using data.table for faster file reading

# Define the path to the folder containing the CSV files
folder_path <- "Source data/Public expenditure/conjunto_de_datos"

# Define a pattern to match files ending with the years 2011 to 2019
year_pattern <- paste0("(", paste(2011:2019, collapse = "|"), ")\\.csv$")

# Get a list of all CSV files in the folder that match the year pattern
csv_files <- list.files(path = folder_path, pattern = year_pattern, full.names = TRUE)

# Load all matching CSV files into a list
csv_list <- lapply(csv_files, fread) # fread is used from data.table package for faster reading

combined_data <- rbindlist(csv_list)
gc()

spend <- combined_data[combined_data$TEMA=="Egresos" & combined_data$DESCRIPCION_CATEGORIA=="Total de egresos",]
rm(combined_data, csv_list)
gc()

spend$ID_ENTIDAD <- sprintf("%02d", spend$ID_ENTIDAD)
spend$ID_MUNICIPIO <- sprintf("%03d", spend$ID_MUNICIPIO)
spend$INEGI <- paste0(spend$ID_ENTIDAD, spend$ID_MUNICIPIO)
spend <- spend[,c("INEGI", "ANIO", "VALOR")]
gc()
colnames(spend)[3] <- "Total_spending"

spend <- spend[spend$INEGI %in% df$INEGI,]

table(spend$INEGI, spend$ANIO)

#linear interpolation for missing data

# Load necessary libraries
library(dplyr)
library(tidyr)
library(zoo)

# Create a complete dataset with all INEGI and ANIO combinations
complete_spend <- expand.grid(
  INEGI = unique(spend$INEGI),
  ANIO = seq(min(spend$ANIO), max(spend$ANIO), by = 1)
)

# Join the complete dataset with the original dataset
spend_complete <- complete_spend %>%
  left_join(spend, by = c("INEGI", "ANIO"))

# Function to interpolate missing values
interpolate_missing <- function(df) {
  df %>%
    group_by(INEGI) %>%
    arrange(ANIO) %>%
    mutate(Total_spending = na.approx(Total_spending, na.rm = FALSE, rule = 2)) %>%
    ungroup()
}

# Apply the interpolation function
spend_imputed <- interpolate_missing(spend_complete)

spend <- spend_imputed
rm(spend_imputed, spend_complete, complete_spend)
gc()

df <- left_join(df, spend, by=c("Year"="ANIO", "INEGI"="INEGI"))


#### SUBSET TO GENETIC MATCHED SAMPLE ####

matched <- read.csv("Outputs/genetic_matched_muns.csv",stringsAsFactors = F)
matched$INEGI <- sprintf("%05d", matched$INEGI)
matched$INEGI <- as.character(matched$INEGI)

df <- df[df$INEGI %in% matched$INEGI,]


#### TANCITARO ####

df$Total_spending <- df$Total_spending/1000000

tancitaro <- df[df$Municipality=="TANCITARO",]


ggplot(data=tancitaro, aes(x=time, y=homicides)) +
  geom_line() +
  annotate("rect", xmin=as.Date("2013-01-01"), xmax=as.Date("2014-01-01"), ymin=-Inf, ymax=Inf, alpha=0.4, fill="grey") +
  theme_classic() +
  ylab("Monthly homicides") +
  xlab("") +
  theme(axis.text = element_text(size = 14), axis.title = element_text(size = 18))

h <- 6
w <- 1.618*h
# Save plot
ggsave("Outputs/plots/Fig9_tancitaro_hom.pdf", width = w, height = h)

ggplot(data=tancitaro, aes(x=time, y=Total_spending))+
  geom_line()+
  annotate("rect", xmin=as.Date("2013-01-01"), xmax=as.Date("2014-01-01"), ymin=-Inf, ymax=Inf, alpha=0.4, fill="grey") +
  theme_classic() +
  scale_y_continuous(limits=c(0, 145))+
  ylab("Total spending (Mil Pesos)") +
  xlab("") +
  theme(axis.text = element_text(size = 14), axis.title = element_text(size = 18))
  
h <- 6
w <- 1.618*h
# Save plot
ggsave("Outputs/plots/Fig14_tancitaro_total_spend.pdf", width = w, height = h)

  


#### MAP OF TREAT_GROUP ####

library(ggplot2)
library(ggspatial)
library(ggpattern)

#create data of treat_group by municipality
map_data <- df[,c("INEGI", "treat_group")]
map_data <- map_data %>% 
  distinct(INEGI, .keep_all=T)

map_data$treat_group[map_data$treat_group=="Turnson"] <- "Export certified between 2011-2019"
map_data$treat_group[map_data$treat_group=="Alwayson"] <- "Export certified before 2011"

#municipalities shape file
mun <- st_read("Source data/Map files/00mun.shp")
st_crs(mun)
#states shape file
state <- st_read("Source data/Map files/Entidades_2010_5.shp")

#MERGE QUERY AND MUN DATASETS
mun <- left_join(mun, map_data, by = c("CVEGEO"="INEGI"))

mun$treat_group[is.na(mun$treat_group)] <- "Missing data or excluded"

#relevel treat group factor
map_data$treat_group <- factor(map_data$treat_group, levels=c("Missing data", "Control", "Export certified before 2011", "Export certified between 2011-2020"))

#pick colors
display.brewer.all(n=NULL, type="all", select=NULL, exact.n=TRUE)
colors <- brewer.pal(n=9, name="Pastel1")
white <- "white"
grey <- "gray85"
red <- colors[1]
green <- colors[3]
darkred <- brewer.pal(n=9, name="Reds")[5]
darkgreen <- brewer.pal(n=9, name="Greens")[5]
black <- "black"

#add centroids so you can plot state names
state <- cbind(state, st_coordinates(st_centroid(state)))
#rename some states
state$NOM_ENT <- as.character(state$NOM_ENT)
state$NOM_ENT
state$NOM_ENT[3] <- "Mexico"
state$NOM_ENT[14] <- "Michoacan"
state$NOM_ENT[21] <- "Queretaro"
state$NOM_ENT[22] <- "San Luis Potosi"
state$NOM_ENT[27] <- "Nuevo Leon"
state$NOM_ENT[30] <- "Yucatan"
state$NOM_ENT[26] <- "Veracruz"
state$NOM_ENT[28] <- "Coahuila"
state$NOM_ENT[1] <- ""

mun <- st_transform(mun, crs=6372)
st_crs(mun)
state <- st_transform(state, crs=6372)
st_crs(state)

us_trade_muns <- unique(df$INEGI[df$ustrade==1])
mun$ustrade <- NA_character_
mun$ustrade <- ""
mun$ustrade[mun$CVEGEO %in% us_trade_muns] <- ", Able to export avocados to the U.S."
mun$treat_group <- paste0(mun$treat_group, mun$ustrade)

h <- 6
w <- 1.618*h
#create pdf of map with states and municipalities and query presence in 2010
#Plot the data
ggplot() +
  ggspatial::annotation_spatial(mun) +
  ggspatial::layer_spatial(mun, aes(fill=treat_group), color="gray14", linewidth=.05)+
  scale_fill_manual(values=c(grey, red, darkred, green, darkgreen, white))+
  geom_sf(data = state, fill=NA, color=black, linewidth=.4) +
  geom_text(data = state, aes(X, Y, label = NOM_ENT), size = 4)+
  theme_void()+
  coord_sf(xlim = c(2000000, 3000000), ylim = c(500000, 1200000), expand = FALSE)+
  theme(legend.position = c(0.2, 0.2), legend.title=element_blank(), legend.key=element_blank())
ggsave("Outputs/plots/Fig4_Mexico_state_mun_treat_groups_gen_match.png", width=w, height=h)

#

#### DIFF-IN-DIFFS - HOMICIDES ####

df1 <- df
inegis_missing_spend <- unique(df$INEGI[is.na(df$Total_spending)])
df <- df[!df$INEGI %in% inegis_missing_spend,]

### INTENTIONAL HOMICIDES ###

lm1mh <- lm_robust(malicious_homicides ~ trade, data=df, clusters=INEGI,
                   fixed_effects = ~factor(INEGI) + factor(time),
                   se_type="stata")
lm2mh <- lm_robust(malicious_homicides ~ trade + ustrade, data=df, clusters=INEGI,
                   fixed_effects = ~factor(INEGI) + factor(time),
                   se_type="stata")
lm3mh <- lm_robust(malicious_homicides ~ trade + ustrade + Total_spending, data=df, clusters=INEGI,
                   fixed_effects = ~factor(INEGI) + factor(time),
                   se_type="stata")
lm4mh <- lm_robust(malicious_homicides ~ trade + ustrade + Total_spending + time*factor(INEGI), data=df, clusters=INEGI,
                          fixed_effects = ~factor(time),
                          se_type="stata")
summary(lm1mh)
summary(lm2mh)
summary(lm3mh)
summary(lm4mh)

lm1h <- lm_robust(homicides ~ trade, data=df, clusters=INEGI,
                   fixed_effects = ~factor(INEGI) + factor(time),
                   se_type="stata")
lm2h <- lm_robust(homicides ~ trade + ustrade, data=df, clusters=INEGI,
                   fixed_effects = ~factor(INEGI) + factor(time),
                   se_type="stata")
lm3h <- lm_robust(homicides ~ trade + ustrade + Total_spending, data=df, clusters=INEGI,
                   fixed_effects = ~factor(INEGI) + factor(time),
                   se_type="stata")
lm4h <- lm_robust(homicides ~ trade + ustrade + Total_spending + time*factor(INEGI), data=df, clusters=INEGI,
                          fixed_effects = ~factor(time),
                          se_type="stata")
summary(lm1h)
summary(lm2h)
summary(lm3h)
summary(lm4h)

lm1gh <- lm_robust(gun_homicides ~ trade, data=df, clusters=INEGI,
                  fixed_effects = ~factor(INEGI) + factor(time),
                  se_type="stata")
lm2gh <- lm_robust(gun_homicides ~ trade + ustrade, data=df, clusters=INEGI,
                  fixed_effects = ~factor(INEGI) + factor(time),
                  se_type="stata")
lm3gh <- lm_robust(gun_homicides ~ trade + ustrade + Total_spending, data=df, clusters=INEGI,
                  fixed_effects = ~factor(INEGI) + factor(time),
                  se_type="stata")
lm4gh <- lm_robust(gun_homicides ~ trade + ustrade + Total_spending + time*factor(INEGI), data=df, clusters=INEGI,
                  fixed_effects = ~factor(time),
                  se_type="stata")
summary(lm1gh)
summary(lm2gh)
summary(lm3gh)
summary(lm4gh)

#

#### EFFECT SIZE - US TRADE ####

# Calculate means of covariates
unique_INEGI <- unique(df$INEGI)
time_levels <- sort(unique(df$time))

# Create all combinations of unique_INEGI and time_levels
new_data_0 <- expand.grid(
  INEGI = unique_INEGI,
  time = time_levels
)

# Set trade, ustrade, and Total_spending to NA
new_data_0$trade <- NA_real_
new_data_0$ustrade <- NA_real_
new_data_0$Total_spending <- NA_real_
new_data_0$trade <- 1
new_data_0$ustrade <- 0
new_data_0$Total_spending <- mean(df$Total_spending, na.rm=T)



# Create all combinations of unique_INEGI and time_levels
new_data_1 <- expand.grid(
  INEGI = unique_INEGI,
  time = time_levels
)

# Set trade, ustrade, and Total_spending to NA
new_data_1$trade <- NA_real_
new_data_1$ustrade <- NA_real_
new_data_1$Total_spending <- NA_real_
new_data_1$trade <- 1
new_data_1$ustrade <- 1
new_data_1$Total_spending <- mean(df$Total_spending, na.rm=T)

new_data_0$INEGI <- as.character(new_data_0$INEGI)
new_data_1$INEGI <- as.character(new_data_1$INEGI)

new_data_0$time_factor <- factor(new_data_0$time, levels = levels(factor(df$time)))
new_data_1$time_factor <- factor(new_data_1$time, levels = levels(factor(df$time)))
new_data_0$INEGI_factor <- factor(new_data_0$INEGI, levels = levels(factor(df$INEGI)))
new_data_1$INEGI_factor <- factor(new_data_1$INEGI, levels = levels(factor(df$INEGI)))

#update model
df$time_factor <- factor(df$time, levels=levels(factor(df$time)))
df$INEGI_factor <- factor(df$INEGI, levels=levels(factor(df$INEGI)))
df$trade <- as.numeric(df$trade)
lm4mh_pred <- lm_robust(malicious_homicides ~ trade + ustrade + Total_spending + time*INEGI_factor + time_factor, data=df, clusters=INEGI,
                   #fixed_effects = ~time_factor,
                   se_type="stata")

# Predict mh values
pred_0 <- predict(lm4mh_pred, newdata = new_data_0)
pred_1 <- predict(lm4mh_pred, newdata = new_data_1)

# Average predicted values over all time levels and INEGI levels
pred_mean_0 <- mean(pred_0)
pred_mean_1 <- mean(pred_1)

### EFFECT SIZE ###
effect_size <- (pred_mean_1 - pred_mean_0) / pred_mean_0
effect_size




#### DIFF-IN-DIFFS - NONVIOLENT CRIMES ####

lm4hr <- lm_robust(home_robbery ~ trade + ustrade + Total_spending + time*factor(INEGI), data=df, clusters=INEGI,
                   fixed_effects = ~factor(time),
                   se_type="stata")
lm4br <- lm_robust(business_robbery ~ trade + ustrade + Total_spending + time*factor(INEGI), data=df, clusters=INEGI,
                    fixed_effects = ~factor(time),
                    se_type="stata")
lm4tr <- lm_robust(transport_robbery ~ trade + ustrade + Total_spending + time*factor(INEGI), data=df, clusters=INEGI,
                    fixed_effects = ~factor(time),
                    se_type="stata")
lm4pr <- lm_robust(personal_robbery ~ trade + ustrade + Total_spending + time*factor(INEGI), data=df, clusters=INEGI,
                    fixed_effects = ~factor(time),
                    se_type="stata")
lm4ex <- lm_robust(extortion ~ trade + ustrade + Total_spending + time*factor(INEGI), data=df, clusters=INEGI,
                    fixed_effects = ~factor(time),
                    se_type="stata")

summary(lm4hr)
summary(lm4br)
summary(lm4tr)
summary(lm4pr)
summary(lm4ex)

#### EVENT STUDY - VIOLENCE, MEXICAN TREATMENT, CALLAWAY-SANT'ANNA SEs ####

# Load data and fit the models
attgt_mh_2011 <- att_gt(yname = "malicious_homicides",
                        tname = "new_time",
                        idname = "INEGI_num",
                        gname = "min_time",
                        xformla = NULL,
                        data = df[df$time_group == 2011,],
                        control_group = "notyettreated")

agg_effects_2011 <- aggte(attgt_mh_2011, type = "dynamic", na.rm = TRUE)

# Convert results to a data frame for plotting
event_study_data_2011 <- data.frame(
  term = agg_effects_2011$egt,
  estimate = agg_effects_2011$att.egt,
  conf.low = agg_effects_2011$att.egt - 1.96 * agg_effects_2011$se.egt,
  conf.high = agg_effects_2011$att.egt + 1.96 * agg_effects_2011$se.egt
)
event_study_data_2011$conf.high[event_study_data_2011$conf.high>5] <- 5 #set vertical limit just for plot

# Plot
ggplot(data = event_study_data_2011, aes(x = term, y = estimate)) +
  geom_point() +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0) +
  theme_classic() +
  geom_hline(yintercept = 0, linetype = 2) +
  geom_vline(xintercept = 0, linetype = 2) +
  ylab("Estimated difference in conditional means \nMonthly intentional homicides") +
  xlab("Time since Mexican certification (Months, 2011 panel)") +
  scale_x_continuous(limits = c(-100, 110), breaks=c(-96, -72, -48, -24, 0, 24, 48, 72, 96)) +
  scale_y_continuous(limits = c(-5, 5)) +
  theme(axis.text = element_text(size = 14), axis.title = element_text(size = 18))

h <- 6
w <- 1.618*h
# Save plot
ggsave("Outputs/plots/Fig6_event_study_2011_callaway-santanna.pdf", width = w, height = h)

# Load data and fit the models
attgt_mh_2014 <- att_gt(yname = "malicious_homicides",
                        tname = "new_time",
                        idname = "INEGI_num",
                        gname = "min_time",
                        xformla = NULL,
                        data = df[df$time_group == 2014,],
                        control_group = "notyettreated")

agg_effects_2014 <- aggte(attgt_mh_2014, type = "dynamic", na.rm = TRUE)

# Convert results to a data frame for plotting
event_study_data_2014 <- data.frame(
  term = agg_effects_2014$egt,
  estimate = agg_effects_2014$att.egt,
  conf.low = agg_effects_2014$att.egt - 1.96 * agg_effects_2014$se.egt,
  conf.high = agg_effects_2014$att.egt + 1.96 * agg_effects_2014$se.egt
)

# Plot
ggplot(data = event_study_data_2014, aes(x = term, y = estimate)) +
  geom_point() +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0) +
  theme_classic() +
  geom_hline(yintercept = 0, linetype = 2) +
  geom_vline(xintercept = 0, linetype = 2) +
  ylab("Estimated difference in conditional means \nMonthly intentional homicides") +
  xlab("Time since Mexican certification (Months, 2014 panel)") +
  scale_x_continuous(breaks=c(-24, 0, 24, 48, 72)) +
  scale_y_continuous(limits = c(-5, 5)) +
  theme(axis.text = element_text(size = 14), axis.title = element_text(size = 18))

h <- 6
w <- 1.618*h
# Save plot
ggsave("Outputs/plots/Fig7_event_study_2014_callaway-santanna.pdf", width = w, height = h)

nsizes <- df %>% 
  group_by(treat_group, time_group) %>% 
  distinct(INEGI) %>% 
  summarize(n = n())


#### EVENT STUDY - VIOLENCE, US TREATMENT, CALLAWAY-SANT'ANNA SEs ####

df$treat_group_us <- NA_character_
us_trade_means <- df %>% 
  group_by(INEGI) %>% 
  summarize(us_trade_mean = mean(ustrade, na.rm=T))
df$treat_group_us[df$INEGI %in% us_trade_means$INEGI[us_trade_means$us_trade_mean<1 & us_trade_means$us_trade_mean>0]] <- "Turnson"
df$treat_group_us[df$INEGI %in% us_trade_means$INEGI[us_trade_means$us_trade_mean==1]] <- "Alwayson"
df$treat_group_us[df$INEGI %in% us_trade_means$INEGI[us_trade_means$us_trade_mean==0]] <- "Control"

nsizes_us <- df %>% 
  group_by(treat_group_us, time_group) %>% 
  distinct(INEGI) %>% 
  summarize(n = n())

# Load data and fit the models
attgt_mh_2011 <- att_gt(yname = "malicious_homicides",
                        tname = "new_time",
                        idname = "INEGI_num",
                        gname = "min_time_us",
                        xformla = NULL,
                        data = df[df$time_group == 2011,],
                        control_group = "notyettreated")

agg_effects_2011 <- aggte(attgt_mh_2011, type = "dynamic", na.rm = TRUE)

# Convert results to a data frame for plotting
event_study_data_2011 <- data.frame(
  term = agg_effects_2011$egt,
  estimate = agg_effects_2011$att.egt,
  conf.low = agg_effects_2011$att.egt - 1.96 * agg_effects_2011$se.egt,
  conf.high = agg_effects_2011$att.egt + 1.96 * agg_effects_2011$se.egt
)
# View(event_study_data_2011)

# Plot
ggplot(data = event_study_data_2011, aes(x = term, y = estimate)) +
  geom_point() +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0) +
  theme_classic() +
  geom_hline(yintercept = 0, linetype = 2) +
  geom_vline(xintercept = 0, linetype = 2) +
  ylab("Estimated difference in conditional means \nMonthly intentional homicides") +
  xlab("Time since US trade (Months, 2011 panel)") +
  scale_x_continuous(limits = c(-100, 110), breaks=c(-96, -72, -48, -24, 0, 24, 48, 72, 96)) +
  scale_y_continuous(limits = c(-5, 5)) +
  theme(axis.text = element_text(size = 14), axis.title = element_text(size = 18))

h <- 6
w <- 1.618*h
# Save plot
ggsave("Outputs/plots/Fig8_event_study_2011_callaway-santanna_us_treat.pdf", width = w, height = h)


### UNABLE TO DO A 2014 GROUP FOR THE US BECAUSE
#NO UNITS WHICH ENTER THE DATA IN 2014 BECOME TREATED WITH US TRADE


#### DIFF-IN-DIFFS - MISSING PERSONS ####

mis <- read.csv("Source data/mis.csv", stringsAsFactors = F)

#combine state and municipality and see how many matches in df
#do same in df
df$state_mun <- paste(df$State, df$Municipality, sep="; ")
df_state_muns <- unique(df$state_mun)

#convert time to Date class
mis <- mis[mis$time!="NO ESPECIFICADO",]
mis <- mis[mis$time!="NO DISPONIBLE",]

mis$time <- as.Date(mis$time, format="%m/%d/%Y")

#subset df to only municipalities in mis
df_mis <- df[df$state_mun %in% mis$state_mun,]

#convert time to year month for both df_mis and mis
df_mis$time_mo <- format(df_mis$time, "%Y-%m")
mis$time_mo <- format(mis$time, "%Y-%m")

#sum missing persons in df
# Compute the counts
counts <- mis %>%
  group_by(state_mun, time_mo) %>%
  summarise(count = n())
# Merge the counts back to df_mis
df_mis <- df_mis %>%
  left_join(counts, by = c("state_mun", "time_mo"))
df_mis$mis_per <- NA_real_
df_mis$mis_per <- df_mis$count
df_mis$mis_per[is.na(df_mis$mis_per)] <- 0

#remove dates after missing persons data ends
sort(unique(df_mis$time_mo[!df_mis$time_mo %in% mis$time_mo]))
df_mis <- df_mis[df_mis$time_mo %in% mis$time_mo,]

#DIFF-IN-DIFFS

### MISSING PERSONS ###

lm1mp <- lm_robust(mis_per ~ trade, data=df_mis, clusters=INEGI,
                   fixed_effects = ~factor(INEGI) + factor(time),
                   se_type="stata")
lm2mp <- lm_robust(mis_per ~ trade + ustrade, data=df_mis, clusters=INEGI,
                   fixed_effects = ~factor(INEGI) + factor(time),
                   se_type="stata")
lm3mp <- lm_robust(mis_per ~ trade + ustrade + Total_spending, data=df_mis, clusters=INEGI,
                   fixed_effects = ~factor(INEGI) + factor(time),
                   se_type="stata")
lm4mp <- lm_robust(mis_per ~ trade + ustrade + Total_spending + time*factor(INEGI), data=df_mis, clusters=INEGI,
                   fixed_effects = ~factor(time),
                   se_type="stata")
summary(lm1mp)
summary(lm2mp)
summary(lm3mp)
summary(lm4mp)

#
#### DIFF-IN-DIFFS - DISAGGREGATED GROUP-TIME ATEs ####

df <- df1

#group n-sizes
table(df$min_time)
table(df$min_time_us)
#group cluster sizes
temp <- df %>% distinct(INEGI, .keep_all = T)
table(temp$min_time)
table(temp$min_time_us)


#2011 panel

attgt_mh_2011 <- att_gt(yname = "malicious_homicides",
                  tname = "new_time",
                  idname = "INEGI_num",
                  gname = "min_time",
                  xformla = NULL,
                  data = df[df$time_group==2011,],
                  control_group = "nevertreated",
                  clustervars = "INEGI")
attgt_hom_2011 <- att_gt(yname = "homicides",
                        tname = "new_time",
                        idname = "INEGI_num",
                        gname = "min_time",
                        xformla = NULL,
                        data = df[df$time_group==2011,],
                        control_group = "nevertreated",
                        clustervars = "INEGI")
attgt_gh_2011 <- att_gt(yname = "gun_homicides",
                        tname = "new_time",
                        idname = "INEGI_num",
                        gname = "min_time",
                        xformla = NULL,
                        data = df[df$time_group==2011,],
                        control_group = "nevertreated",
                        clustervars = "INEGI")
attgt_mis_per_2011 <- att_gt(yname = "mis_per",
                             tname = "new_time",
                             idname = "INEGI_num",
                             gname = "min_time",
                             xformla = NULL,
                             data = df_mis[df_mis$time_group==2011,],
                             control_group = "nevertreated",
                             clustervars = "INEGI")
attgt_hr_2011 <- att_gt(yname = "home_robbery",
                        tname = "new_time",
                        idname = "INEGI_num",
                        gname = "min_time",
                        xformla = NULL,
                        data = df[df$time_group==2011,],
                        control_group = "nevertreated",
                        clustervars = "INEGI")
attgt_br_2011 <- att_gt(yname = "business_robbery",
                        tname = "new_time",
                        idname = "INEGI_num",
                        gname = "min_time",
                        xformla = NULL,
                        data = df[df$time_group==2011,],
                        control_group = "nevertreated",
                        clustervars = "INEGI")
attgt_tr_2011 <- att_gt(yname = "transport_robbery",
                        tname = "new_time",
                        idname = "INEGI_num",
                        gname = "min_time",
                        xformla = NULL,
                        data = df[df$time_group==2011,],
                        control_group = "nevertreated",
                        clustervars = "INEGI")
attgt_pr_2011 <- att_gt(yname = "personal_robbery",
                        tname = "new_time",
                        idname = "INEGI_num",
                        gname = "min_time",
                        xformla = NULL,
                        data = df[df$time_group==2011,],
                        control_group = "nevertreated",
                        clustervars = "INEGI")
attgt_e_2011 <- att_gt(yname = "extortion",
                        tname = "new_time",
                        idname = "INEGI_num",
                        gname = "min_time",
                        xformla = NULL,
                        data = df[df$time_group==2011,],
                       control_group = "nevertreated",
                       clustervars = "INEGI")

attgt_mh_us_2011 <- att_gt(yname = "malicious_homicides",
                           tname = "new_time",
                           idname = "INEGI_num",
                           gname = "min_time_us",
                           data = df[df$time_group==2011,],
                           control_group = "nevertreated",
                           clustervars = "INEGI"
)

attgt_hom_us_2011 <- att_gt(yname = "homicides",
                         tname = "new_time",
                         idname = "INEGI_num",
                         gname = "min_time_us",
                         xformla = NULL,
                         data = df[df$time_group==2011,],
                         control_group = "notyettreated",
                         clustervars = "INEGI")
attgt_gh_us_2011 <- att_gt(yname = "gun_homicides",
                        tname = "new_time",
                        idname = "INEGI_num",
                        gname = "min_time_us",
                        xformla = NULL,
                        data = df[df$time_group==2011,],
                        control_group = "nevertreated",
                        clustervars = "INEGI")
attgt_mis_per_us_2011 <- att_gt(yname = "mis_per",
                             tname = "new_time",
                             idname = "INEGI_num",
                             gname = "min_time_us",
                             xformla = NULL,
                             data = df_mis[df_mis$time_group==2011,],
                             control_group = "nevertreated",
                             clustervars = "INEGI")
attgt_hr_us_2011 <- att_gt(yname = "home_robbery",
                        tname = "new_time",
                        idname = "INEGI_num",
                        gname = "min_time_us",
                        xformla = NULL,
                        data = df[df$time_group==2011,],
                        control_group = "nevertreated",
                        clustervars = "INEGI")
attgt_br_us_2011 <- att_gt(yname = "business_robbery",
                        tname = "new_time",
                        idname = "INEGI_num",
                        gname = "min_time_us",
                        xformla = NULL,
                        data = df[df$time_group==2011,],
                        control_group = "nevertreated",
                        clustervars = "INEGI")
attgt_tr_us_2011 <- att_gt(yname = "transport_robbery",
                        tname = "new_time",
                        idname = "INEGI_num",
                        gname = "min_time_us",
                        xformla = NULL,
                        data = df[df$time_group==2011,],
                        control_group = "nevertreated",
                        clustervars = "INEGI")
attgt_pr_us_2011 <- att_gt(yname = "personal_robbery",
                        tname = "new_time",
                        idname = "INEGI_num",
                        gname = "min_time_us",
                        xformla = NULL,
                        data = df[df$time_group==2011,],
                        control_group = "nevertreated",
                        clustervars = "INEGI")
attgt_e_us_2011 <- att_gt(yname = "extortion",
                       tname = "new_time",
                       idname = "INEGI_num",
                       gname = "min_time_us",
                       xformla = NULL,
                       data = df[df$time_group==2011,],
                       control_group = "nevertreated",
                       clustervars = "INEGI")



#2014 panel

attgt_mh_2014 <- att_gt(yname = "malicious_homicides",
                        tname = "new_time",
                        idname = "INEGI_num",
                        gname = "min_time",
                        xformla = NULL,
                        data = df[df$time_group==2014,],
                        control_group = "nevertreated",
                        clustervars = "INEGI")
attgt_hom_2014 <- att_gt(yname = "homicides",
                         tname = "new_time",
                         idname = "INEGI_num",
                         gname = "min_time",
                         xformla = NULL,
                         data = df[df$time_group==2014,],
                         control_group = "nevertreated",
                         clustervars = "INEGI")
attgt_gh_2014 <- att_gt(yname = "gun_homicides",
                        tname = "new_time",
                        idname = "INEGI_num",
                        gname = "min_time",
                        xformla = NULL,
                        data = df[df$time_group==2014,],
                        control_group = "nevertreated",
                        clustervars = "INEGI")
attgt_mis_per_2014 <- att_gt(yname = "mis_per",
                             tname = "new_time",
                             idname = "INEGI_num",
                             gname = "min_time",
                             xformla = NULL,
                             data = df_mis[df_mis$time_group==2014,],
                             control_group = "nevertreated",
                             clustervars = "INEGI")
attgt_hr_2014 <- att_gt(yname = "home_robbery",
                        tname = "new_time",
                        idname = "INEGI_num",
                        gname = "min_time",
                        xformla = NULL,
                        data = df[df$time_group==2014,],
                        control_group = "nevertreated",
                        clustervars = "INEGI")
attgt_br_2014 <- att_gt(yname = "business_robbery",
                        tname = "new_time",
                        idname = "INEGI_num",
                        gname = "min_time",
                        xformla = NULL,
                        data = df[df$time_group==2014,],
                        control_group = "nevertreated",
                        clustervars = "INEGI")
attgt_tr_2014 <- att_gt(yname = "transport_robbery",
                        tname = "new_time",
                        idname = "INEGI_num",
                        gname = "min_time",
                        xformla = NULL,
                        data = df[df$time_group==2014,],
                        control_group = "nevertreated",
                        clustervars = "INEGI")
attgt_pr_2014 <- att_gt(yname = "personal_robbery",
                        tname = "new_time",
                        idname = "INEGI_num",
                        gname = "min_time",
                        xformla = NULL,
                        data = df[df$time_group==2014,],
                        control_group = "nevertreated",
                        clustervars = "INEGI")
attgt_e_2014 <- att_gt(yname = "extortion",
                       tname = "new_time",
                       idname = "INEGI_num",
                       gname = "min_time",
                       xformla = NULL,
                       data = df[df$time_group==2014,],
                       control_group = "nevertreated",
                       clustervars = "INEGI")

#simple aggregation of group effects
agg.simple_mh_2011 <- aggte(attgt_mh_2011, type = "simple", na.rm=T)
agg.simple_hom_2011 <- aggte(attgt_hom_2011, type = "simple", na.rm=T)
agg.simple_gh_2011 <- aggte(attgt_gh_2011, type = "simple", na.rm=T)
agg.simple_mis_per_2011 <- aggte(attgt_mis_per_2011, type = "simple", na.rm=T)
agg.simple_hr_2011 <- aggte(attgt_hr_2011, type = "simple", na.rm=T)
agg.simple_br_2011 <- aggte(attgt_br_2011, type = "simple", na.rm=T)
agg.simple_tr_2011 <- aggte(attgt_tr_2011, type = "simple", na.rm=T)
agg.simple_pr_2011 <- aggte(attgt_pr_2011, type = "simple", na.rm=T)
agg.simple_e_2011 <- aggte(attgt_e_2011, type = "simple", na.rm=T)

agg.simple_mh_us_2011 <- aggte(attgt_mh_us_2011, type = "simple", na.rm=T)
agg.simple_hom_us_2011 <- aggte(attgt_hom_us_2011, type = "simple", na.rm=T)
agg.simple_gh_us_2011 <- aggte(attgt_gh_us_2011, type = "simple", na.rm=T)
agg.simple_mis_per_us_2011 <- aggte(attgt_mis_per_us_2011, type = "simple", na.rm=T)
agg.simple_hr_us_2011 <- aggte(attgt_hr_us_2011, type = "simple", na.rm=T)
agg.simple_br_us_2011 <- aggte(attgt_br_us_2011, type = "simple", na.rm=T)
agg.simple_tr_us_2011 <- aggte(attgt_tr_us_2011, type = "simple", na.rm=T)
agg.simple_pr_us_2011 <- aggte(attgt_pr_us_2011, type = "simple", na.rm=T)
agg.simple_e_us_2011 <- aggte(attgt_e_us_2011, type = "simple", na.rm=T)

agg.simple_mh_2014 <- aggte(attgt_mh_2014, type = "simple", na.rm=T)
agg.simple_hom_2014 <- aggte(attgt_hom_2014, type = "simple", na.rm=T)
agg.simple_gh_2014 <- aggte(attgt_gh_2014, type = "simple", na.rm=T)
agg.simple_mis_per_2014 <- aggte(attgt_mis_per_2014, type = "simple", na.rm=T)
agg.simple_hr_2014 <- aggte(attgt_hr_2014, type = "simple", na.rm=T)
agg.simple_br_2014 <- aggte(attgt_br_2014, type = "simple", na.rm=T)
agg.simple_tr_2014 <- aggte(attgt_tr_2014, type = "simple", na.rm=T)
agg.simple_pr_2014 <- aggte(attgt_pr_2014, type = "simple", na.rm=T)
agg.simple_e_2014 <- aggte(attgt_e_2014, type = "simple", na.rm=T)

# Open a connection to a file
output_file <- file("Outputs/TA5-A7_summary_output.txt", "w")

# Define the list of objects and their names for clarity
summary_items <- list(
  "agg.simple_mh_2011" = agg.simple_mh_2011,
  "agg.simple_hom_2011" = agg.simple_hom_2011,
  "agg.simple_gh_2011" = agg.simple_gh_2011,
  "agg.simple_mis_per_2011" = agg.simple_mis_per_2011,
  "agg.simple_hr_2011" = agg.simple_hr_2011,
  "agg.simple_br_2011" = agg.simple_br_2011,
  "agg.simple_tr_2011" = agg.simple_tr_2011,
  "agg.simple_pr_2011" = agg.simple_pr_2011,
  "agg.simple_e_2011" = agg.simple_e_2011,
  
  "agg.simple_mh_2014" = agg.simple_mh_2014,
  "agg.simple_hom_2014" = agg.simple_hom_2014,
  "agg.simple_gh_2014" = agg.simple_gh_2014,
  "agg.simple_mis_per_2014" = agg.simple_mis_per_2014,
  "agg.simple_hr_2014" = agg.simple_hr_2014,
  "agg.simple_br_2014" = agg.simple_br_2014,
  "agg.simple_tr_2014" = agg.simple_tr_2014,
  "agg.simple_pr_2014" = agg.simple_pr_2014,
  "agg.simple_e_2014" = agg.simple_e_2014,
  
  "df_2011_nrow" = nrow(df[df$time_group == 2011, ]),
  "df_2011_unique" = length(unique(df$INEGI[df$time_group == 2011])),
  "df_mis_2011_nrow" = nrow(df_mis[df_mis$time_group == 2011, ]),
  "df_mis_2011_unique" = length(unique(df_mis$INEGI[df_mis$time_group == 2011])),
  
  "agg.simple_mh_us_2011" = agg.simple_mh_us_2011,
  "agg.simple_hom_us_2011" = agg.simple_hom_us_2011,
  "agg.simple_gh_us_2011" = agg.simple_gh_us_2011,
  "agg.simple_mis_per_us_2011" = agg.simple_mis_per_us_2011,
  "agg.simple_hr_us_2011" = agg.simple_hr_us_2011,
  "agg.simple_br_us_2011" = agg.simple_br_us_2011,
  "agg.simple_tr_us_2011" = agg.simple_tr_us_2011,
  "agg.simple_pr_us_2011" = agg.simple_pr_us_2011,
  "agg.simple_e_us_2011" = agg.simple_e_us_2011,
  
  "df_2014_nrow" = nrow(df[df$time_group == 2014, ]),
  "df_2014_unique" = length(unique(df$INEGI[df$time_group == 2014])),
  "df_mis_2014_nrow" = nrow(df_mis[df_mis$time_group == 2014, ]),
  "df_mis_2014_unique" = length(unique(df_mis$INEGI[df_mis$time_group == 2014]))
)

# Loop through the list and capture the output of each command
for (name in names(summary_items)) {
  item <- summary_items[[name]]
  
  # Capture the output
  if (class(item) == "summaryDefault" || class(item) == "summary.lm") {
    output <- capture.output(summary(item))
  } else {
    output <- capture.output(item)
  }
  
  # Write the name of the item for clarity
  writeLines(paste("Summary of", name), output_file)
  
  # Write the output to the file
  writeLines(output, output_file)
  
  # Add a separator for clarity between outputs
  writeLines("\n\n---\n\n", output_file)
}

# Close the file connection
close(output_file)



#### RESULTS FOR PAPER ####

#diff-in-diff regression results
#intentional hom
texreg(l=list(lm1mh, lm2mh, lm3mh, lm4mh), include.ci=F, include.rmse=F, omit.coef = "factor", file="Outputs/T2_mhregs.txt")
#gun hom
texreg(l=list(lm1gh, lm2gh, lm3gh, lm4gh), include.ci=F, include.rmse=F, omit.coef = "factor", file="Outputs/TA2_ghregs.txt")
#hom
texreg(l=list(lm1h, lm2h, lm3h, lm4h), include.ci=F, include.rmse=F, omit.coef = "factor", file="Outputs/TA1_hregs.txt")
#mis per
texreg(l=list(lm1mp, lm2mp, lm3mp, lm4mp), include.ci=F, include.rmse=F, omit.coef = "factor", file="Outputs/TA3_mpregs.txt")
#placebos
texreg(l=list(lm4hr, lm4br, lm4tr, lm4pr, lm4ex), include.ci=F, include.rmse=F, omit.coef = "factor", file="Outputs/TA4_placeboregs.txt")

#### LOAD AUTODEFENSA DATA ####

df1 <- df
inegis_missing_spend <- unique(df$INEGI[is.na(df$Total_spending)])
df <- df[!df$INEGI %in% inegis_missing_spend,]

mich_muns <- unique(df$Municipality[df$State=="MICHOACAN"])

#load municipality_treatment_times
treat <- read.csv("Source data/municipality_treatment_times.csv")

#load mexico_municipality_pop
pop <- read.csv("Source data/mexico_municipality_pop.csv")

#### MERGE AUTODEFENSA DATA ####

#check how many municipalities in michoacan
length(unique(pop$name[pop$State=="MICHOACAN"])) #they're all there. good

#subset pop to just michoacan
pop <- pop[pop$State=="MICHOACAN",]
pop <- pop[pop$year==max(pop$year, na.rm=T),]

#subset treat to just michoacan
treat <- treat[treat$state=="MICHOACAN",]

#check if all of treat names are in pop
length(treat$municipality %in% pop$name) #they are. good

#merge treat to pop
#select only columns we care about
treat <- treat[,c("municipality", "document.date", "est_us_date")]
treat$document.date <- as.Date(treat$document.date)
treat$est_us_date <- as.Date(treat$est_us_date)
library(dplyr)
df <- left_join(pop, treat, by=c("name"="municipality"))


#### AUTODEFENSA DATA MERGING ####

#create treated at time of study column
#for fuentes diaz study
df$treat_fd <- 0
df$treat_fd[df$document.date<=as.Date("2014-07-31")] <- 1
df$us_treat_fd <- 0
df$us_treat_fd[df$est_us_date<=as.Date("2014-07-31")] <- 1
#for mexican government study
df$treat_cndh <- 0
df$treat_cndh[df$document.date<=as.Date("2014-05-31")] <- 1
df$us_treat_cndh <- 0
df$us_treat_cndh[df$est_us_date<=as.Date("2014-05-31")] <- 1


#create autodefensa existence variable
auto <- read.csv("Source data/autodefensas.csv")
auto$autodefensa_fuentes_diaz[is.na(auto$autodefensa_fuentes_diaz)] <- 0
auto$autodefensa_cndh[is.na(auto$autodefensa_cndh)] <- 0
#join with df
df <- left_join(df, auto, by=c("name"="Name"))


#### AUTODEFENSA ANALYSIS ####

df1 <- df[df$name %in% mich_muns,]
df2 <- df

#TABLE 1 of autodefensa analysis

#see if places which were treated were more likely to have autodefensas
lm1 <- lm(autodefensa_fuentes_diaz ~ treat_fd, data=df1)
lm2 <- lm(autodefensa_fuentes_diaz ~ us_treat_fd, data=df1)
lm3 <- lm(autodefensa_fuentes_diaz ~ treat_fd + us_treat_fd, data=df1)
lm4 <- lm(autodefensa_cndh ~ treat_cndh, data=df1)
lm5 <- lm(autodefensa_cndh ~ us_treat_cndh, data=df1)
lm6 <- lm(autodefensa_cndh ~ treat_cndh + us_treat_cndh, data=df1)
summary(lm1)
summary(lm2)
summary(lm3)
summary(lm4)
summary(lm5)
summary(lm6)

#regression table
texreg(l=list(lm1, lm2, lm3, lm4, lm5, lm6), include.ci=F, include.rmse=F, file="Outputs/T3_autodefensa1.txt")



#TABLE 2 of autodefensa analysis

#see if places which were treated were more likely to have autodefensas
lm7 <- lm(autodefensa_fuentes_diaz ~ treat_fd, data=df2)
lm8 <- lm(autodefensa_fuentes_diaz ~ us_treat_fd, data=df2)
lm9 <- lm(autodefensa_fuentes_diaz ~ treat_fd + us_treat_fd, data=df2)
lm10 <- lm(autodefensa_cndh ~ treat_cndh, data=df2)
lm11 <- lm(autodefensa_cndh ~ us_treat_cndh, data=df2)
lm12 <- lm(autodefensa_cndh ~ treat_cndh + us_treat_cndh, data=df2)
summary(lm7)
summary(lm8)
summary(lm9)
summary(lm10)
summary(lm11)
summary(lm12)

#regression table
texreg(l=list(lm7, lm8, lm9, lm10, lm11, lm12), include.ci=F, include.rmse=F, file="Outputs/TA8_autodefensa2.txt")
